import scrapy
import json
from sh.items import HyItem

class HuyaSpider(scrapy.Spider):
    name = 'huya'
    # allowed_domains = ['hot.com']
    # start_urls = ['https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId=1663&tagAll=0&page=1','https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId=1663&tagAll=0&page=1']

    def start_requests(self):
        url = ['https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId=1663&tagAll=0&page=1','https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId=1663&tagAll=0&page=1']
        for i in url:
            yield scrapy.Request(url=i,dont_filter=True)

    def parse(self, response):
        print(response.url)
        item = json.loads(response.text)
        data = item.get('data').get('datas')
        # da = []
        for i in data:
            items = HyItem()
            items['nick'] = i.get('nick')
            # da.append(s)
            yield items


'''
去重2种方式
 请求前地址去重
 请求后对入库的数据去重 python自带的set去重
 
'''